{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "6f150155",
   "metadata": {},
   "outputs": [],
   "source": [
    "from tsfresh.examples.robot_execution_failures import download_robot_execution_failures,load_robot_execution_failures\n",
    "from tsfresh import extract_features,select_features\n",
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "0d9c9e23",
   "metadata": {},
   "source": [
    "下一步需要注意，由于国内网络的限制，直接运行时会导致连接失败，此时有两个办法  \n",
    "1）在该地址 https://github.com/MaxBenChrist/robot-failure-dataset  手动下载lp1.data  \n",
    "2）在网站https://www.ipaddress.com 输入https://raw.githubusercontent.com 的真实ip，然后在C:\\Windows\\System32\\drivers\\etc下的hosts文件中添加类似这样的几行185.199.108.133 raw.githubusercontent.com  "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "4f54b343",
   "metadata": {},
   "outputs": [],
   "source": [
    "download_robot_execution_failures() #下载数据\n",
    "timeseries, y = load_robot_execution_failures() # 加载数据"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "b4ec6337",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>time</th>\n",
       "      <th>F_x</th>\n",
       "      <th>F_y</th>\n",
       "      <th>F_z</th>\n",
       "      <th>T_x</th>\n",
       "      <th>T_y</th>\n",
       "      <th>T_z</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>-1</td>\n",
       "      <td>-1</td>\n",
       "      <td>63</td>\n",
       "      <td>-3</td>\n",
       "      <td>-1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>62</td>\n",
       "      <td>-3</td>\n",
       "      <td>-1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>-1</td>\n",
       "      <td>-1</td>\n",
       "      <td>61</td>\n",
       "      <td>-3</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>-1</td>\n",
       "      <td>-1</td>\n",
       "      <td>63</td>\n",
       "      <td>-2</td>\n",
       "      <td>-1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1</td>\n",
       "      <td>4</td>\n",
       "      <td>-1</td>\n",
       "      <td>-1</td>\n",
       "      <td>63</td>\n",
       "      <td>-3</td>\n",
       "      <td>-1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1315</th>\n",
       "      <td>88</td>\n",
       "      <td>10</td>\n",
       "      <td>-10</td>\n",
       "      <td>2</td>\n",
       "      <td>39</td>\n",
       "      <td>-21</td>\n",
       "      <td>-24</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1316</th>\n",
       "      <td>88</td>\n",
       "      <td>11</td>\n",
       "      <td>-11</td>\n",
       "      <td>2</td>\n",
       "      <td>38</td>\n",
       "      <td>-24</td>\n",
       "      <td>-22</td>\n",
       "      <td>6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1317</th>\n",
       "      <td>88</td>\n",
       "      <td>12</td>\n",
       "      <td>-12</td>\n",
       "      <td>3</td>\n",
       "      <td>23</td>\n",
       "      <td>-24</td>\n",
       "      <td>-24</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1318</th>\n",
       "      <td>88</td>\n",
       "      <td>13</td>\n",
       "      <td>-13</td>\n",
       "      <td>4</td>\n",
       "      <td>26</td>\n",
       "      <td>-29</td>\n",
       "      <td>-27</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1319</th>\n",
       "      <td>88</td>\n",
       "      <td>14</td>\n",
       "      <td>-13</td>\n",
       "      <td>2</td>\n",
       "      <td>15</td>\n",
       "      <td>-25</td>\n",
       "      <td>-25</td>\n",
       "      <td>6</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>1320 rows × 8 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "      id  time  F_x  F_y  F_z  T_x  T_y  T_z\n",
       "0      1     0   -1   -1   63   -3   -1    0\n",
       "1      1     1    0    0   62   -3   -1    0\n",
       "2      1     2   -1   -1   61   -3    0    0\n",
       "3      1     3   -1   -1   63   -2   -1    0\n",
       "4      1     4   -1   -1   63   -3   -1    0\n",
       "...   ..   ...  ...  ...  ...  ...  ...  ...\n",
       "1315  88    10  -10    2   39  -21  -24    5\n",
       "1316  88    11  -11    2   38  -24  -22    6\n",
       "1317  88    12  -12    3   23  -24  -24    5\n",
       "1318  88    13  -13    4   26  -29  -27    5\n",
       "1319  88    14  -13    2   15  -25  -25    6\n",
       "\n",
       "[1320 rows x 8 columns]"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "timeseries"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "0771335d",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Index(['id', 'time', 'F_x', 'F_y', 'F_z', 'T_x', 'T_y', 'T_z'], dtype='object')"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "timeseries.columns #该数据集包含8列，其中id表明类别id，time为时间轴，其他6列为不同维度的时间序列值"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "id": "6d1bfd0b",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Feature Extraction: 100%|██████████████████████████████████████████████████████████████| 20/20 [00:12<00:00,  1.64it/s]\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>T_x__variance_larger_than_standard_deviation</th>\n",
       "      <th>T_x__has_duplicate_max</th>\n",
       "      <th>T_x__has_duplicate_min</th>\n",
       "      <th>T_x__has_duplicate</th>\n",
       "      <th>T_x__sum_values</th>\n",
       "      <th>T_x__abs_energy</th>\n",
       "      <th>T_x__mean_abs_change</th>\n",
       "      <th>T_x__mean_change</th>\n",
       "      <th>T_x__mean_second_derivative_central</th>\n",
       "      <th>T_x__median</th>\n",
       "      <th>...</th>\n",
       "      <th>F_z__permutation_entropy__dimension_5__tau_1</th>\n",
       "      <th>F_z__permutation_entropy__dimension_6__tau_1</th>\n",
       "      <th>F_z__permutation_entropy__dimension_7__tau_1</th>\n",
       "      <th>F_z__query_similarity_count__query_None__threshold_0.0</th>\n",
       "      <th>F_z__matrix_profile__feature_\"min\"__threshold_0.98</th>\n",
       "      <th>F_z__matrix_profile__feature_\"max\"__threshold_0.98</th>\n",
       "      <th>F_z__matrix_profile__feature_\"mean\"__threshold_0.98</th>\n",
       "      <th>F_z__matrix_profile__feature_\"median\"__threshold_0.98</th>\n",
       "      <th>F_z__matrix_profile__feature_\"25\"__threshold_0.98</th>\n",
       "      <th>F_z__matrix_profile__feature_\"75\"__threshold_0.98</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>-43.0</td>\n",
       "      <td>125.0</td>\n",
       "      <td>0.214286</td>\n",
       "      <td>0.071429</td>\n",
       "      <td>0.038462</td>\n",
       "      <td>-3.0</td>\n",
       "      <td>...</td>\n",
       "      <td>1.972247</td>\n",
       "      <td>2.163956</td>\n",
       "      <td>2.197225</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>-53.0</td>\n",
       "      <td>363.0</td>\n",
       "      <td>3.785714</td>\n",
       "      <td>-0.071429</td>\n",
       "      <td>0.153846</td>\n",
       "      <td>-3.0</td>\n",
       "      <td>...</td>\n",
       "      <td>2.397895</td>\n",
       "      <td>2.302585</td>\n",
       "      <td>2.197225</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>-60.0</td>\n",
       "      <td>344.0</td>\n",
       "      <td>3.214286</td>\n",
       "      <td>0.071429</td>\n",
       "      <td>-0.076923</td>\n",
       "      <td>-5.0</td>\n",
       "      <td>...</td>\n",
       "      <td>2.397895</td>\n",
       "      <td>2.302585</td>\n",
       "      <td>2.197225</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>-93.0</td>\n",
       "      <td>763.0</td>\n",
       "      <td>3.714286</td>\n",
       "      <td>-0.428571</td>\n",
       "      <td>-0.192308</td>\n",
       "      <td>-6.0</td>\n",
       "      <td>...</td>\n",
       "      <td>2.271869</td>\n",
       "      <td>2.302585</td>\n",
       "      <td>2.197225</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>-105.0</td>\n",
       "      <td>849.0</td>\n",
       "      <td>4.071429</td>\n",
       "      <td>-0.357143</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>-8.0</td>\n",
       "      <td>...</td>\n",
       "      <td>2.271869</td>\n",
       "      <td>2.302585</td>\n",
       "      <td>2.197225</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>84</th>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>5083.0</td>\n",
       "      <td>1825597.0</td>\n",
       "      <td>18.857143</td>\n",
       "      <td>15.285714</td>\n",
       "      <td>-0.538462</td>\n",
       "      <td>394.0</td>\n",
       "      <td>...</td>\n",
       "      <td>1.366711</td>\n",
       "      <td>1.609438</td>\n",
       "      <td>1.831020</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>85</th>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>-511.0</td>\n",
       "      <td>18023.0</td>\n",
       "      <td>2.785714</td>\n",
       "      <td>-1.214286</td>\n",
       "      <td>0.192308</td>\n",
       "      <td>-33.0</td>\n",
       "      <td>...</td>\n",
       "      <td>1.972247</td>\n",
       "      <td>2.163956</td>\n",
       "      <td>2.197225</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>86</th>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>-987.0</td>\n",
       "      <td>67981.0</td>\n",
       "      <td>3.928571</td>\n",
       "      <td>-3.500000</td>\n",
       "      <td>-0.153846</td>\n",
       "      <td>-65.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.600166</td>\n",
       "      <td>0.639032</td>\n",
       "      <td>0.683739</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>87</th>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>-1921.0</td>\n",
       "      <td>247081.0</td>\n",
       "      <td>6.642857</td>\n",
       "      <td>-0.357143</td>\n",
       "      <td>0.461538</td>\n",
       "      <td>-126.0</td>\n",
       "      <td>...</td>\n",
       "      <td>1.366711</td>\n",
       "      <td>1.609438</td>\n",
       "      <td>1.831020</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>88</th>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>-304.0</td>\n",
       "      <td>6408.0</td>\n",
       "      <td>2.428571</td>\n",
       "      <td>-0.714286</td>\n",
       "      <td>0.230769</td>\n",
       "      <td>-21.0</td>\n",
       "      <td>...</td>\n",
       "      <td>2.397895</td>\n",
       "      <td>2.302585</td>\n",
       "      <td>2.197225</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>88 rows × 4722 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "    T_x__variance_larger_than_standard_deviation  T_x__has_duplicate_max  \\\n",
       "1                                            0.0                     1.0   \n",
       "2                                            1.0                     1.0   \n",
       "3                                            1.0                     0.0   \n",
       "4                                            1.0                     1.0   \n",
       "5                                            1.0                     0.0   \n",
       "..                                           ...                     ...   \n",
       "84                                           1.0                     0.0   \n",
       "85                                           1.0                     0.0   \n",
       "86                                           1.0                     0.0   \n",
       "87                                           1.0                     0.0   \n",
       "88                                           1.0                     1.0   \n",
       "\n",
       "    T_x__has_duplicate_min  T_x__has_duplicate  T_x__sum_values  \\\n",
       "1                      1.0                 1.0            -43.0   \n",
       "2                      1.0                 1.0            -53.0   \n",
       "3                      1.0                 1.0            -60.0   \n",
       "4                      0.0                 1.0            -93.0   \n",
       "5                      0.0                 1.0           -105.0   \n",
       "..                     ...                 ...              ...   \n",
       "84                     0.0                 1.0           5083.0   \n",
       "85                     0.0                 1.0           -511.0   \n",
       "86                     0.0                 1.0           -987.0   \n",
       "87                     0.0                 1.0          -1921.0   \n",
       "88                     0.0                 1.0           -304.0   \n",
       "\n",
       "    T_x__abs_energy  T_x__mean_abs_change  T_x__mean_change  \\\n",
       "1             125.0              0.214286          0.071429   \n",
       "2             363.0              3.785714         -0.071429   \n",
       "3             344.0              3.214286          0.071429   \n",
       "4             763.0              3.714286         -0.428571   \n",
       "5             849.0              4.071429         -0.357143   \n",
       "..              ...                   ...               ...   \n",
       "84        1825597.0             18.857143         15.285714   \n",
       "85          18023.0              2.785714         -1.214286   \n",
       "86          67981.0              3.928571         -3.500000   \n",
       "87         247081.0              6.642857         -0.357143   \n",
       "88           6408.0              2.428571         -0.714286   \n",
       "\n",
       "    T_x__mean_second_derivative_central  T_x__median  ...  \\\n",
       "1                              0.038462         -3.0  ...   \n",
       "2                              0.153846         -3.0  ...   \n",
       "3                             -0.076923         -5.0  ...   \n",
       "4                             -0.192308         -6.0  ...   \n",
       "5                              0.000000         -8.0  ...   \n",
       "..                                  ...          ...  ...   \n",
       "84                            -0.538462        394.0  ...   \n",
       "85                             0.192308        -33.0  ...   \n",
       "86                            -0.153846        -65.0  ...   \n",
       "87                             0.461538       -126.0  ...   \n",
       "88                             0.230769        -21.0  ...   \n",
       "\n",
       "    F_z__permutation_entropy__dimension_5__tau_1  \\\n",
       "1                                       1.972247   \n",
       "2                                       2.397895   \n",
       "3                                       2.397895   \n",
       "4                                       2.271869   \n",
       "5                                       2.271869   \n",
       "..                                           ...   \n",
       "84                                      1.366711   \n",
       "85                                      1.972247   \n",
       "86                                      0.600166   \n",
       "87                                      1.366711   \n",
       "88                                      2.397895   \n",
       "\n",
       "    F_z__permutation_entropy__dimension_6__tau_1  \\\n",
       "1                                       2.163956   \n",
       "2                                       2.302585   \n",
       "3                                       2.302585   \n",
       "4                                       2.302585   \n",
       "5                                       2.302585   \n",
       "..                                           ...   \n",
       "84                                      1.609438   \n",
       "85                                      2.163956   \n",
       "86                                      0.639032   \n",
       "87                                      1.609438   \n",
       "88                                      2.302585   \n",
       "\n",
       "    F_z__permutation_entropy__dimension_7__tau_1  \\\n",
       "1                                       2.197225   \n",
       "2                                       2.197225   \n",
       "3                                       2.197225   \n",
       "4                                       2.197225   \n",
       "5                                       2.197225   \n",
       "..                                           ...   \n",
       "84                                      1.831020   \n",
       "85                                      2.197225   \n",
       "86                                      0.683739   \n",
       "87                                      1.831020   \n",
       "88                                      2.197225   \n",
       "\n",
       "    F_z__query_similarity_count__query_None__threshold_0.0  \\\n",
       "1                                                 NaN        \n",
       "2                                                 NaN        \n",
       "3                                                 NaN        \n",
       "4                                                 NaN        \n",
       "5                                                 NaN        \n",
       "..                                                ...        \n",
       "84                                                NaN        \n",
       "85                                                NaN        \n",
       "86                                                NaN        \n",
       "87                                                NaN        \n",
       "88                                                NaN        \n",
       "\n",
       "    F_z__matrix_profile__feature_\"min\"__threshold_0.98  \\\n",
       "1                                                 NaN    \n",
       "2                                                 NaN    \n",
       "3                                                 NaN    \n",
       "4                                                 NaN    \n",
       "5                                                 NaN    \n",
       "..                                                ...    \n",
       "84                                                NaN    \n",
       "85                                                NaN    \n",
       "86                                                NaN    \n",
       "87                                                NaN    \n",
       "88                                                NaN    \n",
       "\n",
       "    F_z__matrix_profile__feature_\"max\"__threshold_0.98  \\\n",
       "1                                                 NaN    \n",
       "2                                                 NaN    \n",
       "3                                                 NaN    \n",
       "4                                                 NaN    \n",
       "5                                                 NaN    \n",
       "..                                                ...    \n",
       "84                                                NaN    \n",
       "85                                                NaN    \n",
       "86                                                NaN    \n",
       "87                                                NaN    \n",
       "88                                                NaN    \n",
       "\n",
       "    F_z__matrix_profile__feature_\"mean\"__threshold_0.98  \\\n",
       "1                                                 NaN     \n",
       "2                                                 NaN     \n",
       "3                                                 NaN     \n",
       "4                                                 NaN     \n",
       "5                                                 NaN     \n",
       "..                                                ...     \n",
       "84                                                NaN     \n",
       "85                                                NaN     \n",
       "86                                                NaN     \n",
       "87                                                NaN     \n",
       "88                                                NaN     \n",
       "\n",
       "    F_z__matrix_profile__feature_\"median\"__threshold_0.98  \\\n",
       "1                                                 NaN       \n",
       "2                                                 NaN       \n",
       "3                                                 NaN       \n",
       "4                                                 NaN       \n",
       "5                                                 NaN       \n",
       "..                                                ...       \n",
       "84                                                NaN       \n",
       "85                                                NaN       \n",
       "86                                                NaN       \n",
       "87                                                NaN       \n",
       "88                                                NaN       \n",
       "\n",
       "    F_z__matrix_profile__feature_\"25\"__threshold_0.98  \\\n",
       "1                                                 NaN   \n",
       "2                                                 NaN   \n",
       "3                                                 NaN   \n",
       "4                                                 NaN   \n",
       "5                                                 NaN   \n",
       "..                                                ...   \n",
       "84                                                NaN   \n",
       "85                                                NaN   \n",
       "86                                                NaN   \n",
       "87                                                NaN   \n",
       "88                                                NaN   \n",
       "\n",
       "    F_z__matrix_profile__feature_\"75\"__threshold_0.98  \n",
       "1                                                 NaN  \n",
       "2                                                 NaN  \n",
       "3                                                 NaN  \n",
       "4                                                 NaN  \n",
       "5                                                 NaN  \n",
       "..                                                ...  \n",
       "84                                                NaN  \n",
       "85                                                NaN  \n",
       "86                                                NaN  \n",
       "87                                                NaN  \n",
       "88                                                NaN  \n",
       "\n",
       "[88 rows x 4722 columns]"
      ]
     },
     "execution_count": 28,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 自动抽取全部特征\n",
    "X_extracted = extract_features(timeseries,column_id = \"id\",column_sort = \"time\")\n",
    "X_extracted"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "id": "7dd713a5",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Feature Extraction: 100%|██████████████████████████████████████████████████████████████| 20/20 [00:04<00:00,  4.87it/s]\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>F_x__length</th>\n",
       "      <th>F_x__large_standard_deviation__r_0.05</th>\n",
       "      <th>F_x__large_standard_deviation__r_0.1</th>\n",
       "      <th>F_y__length</th>\n",
       "      <th>F_y__large_standard_deviation__r_0.05</th>\n",
       "      <th>F_y__large_standard_deviation__r_0.1</th>\n",
       "      <th>F_z__length</th>\n",
       "      <th>F_z__large_standard_deviation__r_0.05</th>\n",
       "      <th>F_z__large_standard_deviation__r_0.1</th>\n",
       "      <th>T_x__length</th>\n",
       "      <th>T_x__large_standard_deviation__r_0.05</th>\n",
       "      <th>T_x__large_standard_deviation__r_0.1</th>\n",
       "      <th>T_y__length</th>\n",
       "      <th>T_y__large_standard_deviation__r_0.05</th>\n",
       "      <th>T_y__large_standard_deviation__r_0.1</th>\n",
       "      <th>T_z__length</th>\n",
       "      <th>T_z__large_standard_deviation__r_0.05</th>\n",
       "      <th>T_z__large_standard_deviation__r_0.1</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>15.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>15.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>15.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>15.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>15.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>15.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>15.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>15.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>15.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>15.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>15.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>15.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>15.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>15.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>15.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>15.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>15.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>15.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>15.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>15.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>15.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>15.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>15.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>15.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>15.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>15.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>15.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>15.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>15.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>15.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>84</th>\n",
       "      <td>15.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>15.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>15.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>15.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>15.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>15.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>85</th>\n",
       "      <td>15.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>15.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>15.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>15.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>15.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>15.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>86</th>\n",
       "      <td>15.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>15.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>15.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>15.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>15.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>15.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>87</th>\n",
       "      <td>15.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>15.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>15.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>15.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>15.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>15.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>88</th>\n",
       "      <td>15.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>15.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>15.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>15.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>15.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>15.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>88 rows × 18 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "    F_x__length  F_x__large_standard_deviation__r_0.05  \\\n",
       "1          15.0                                    1.0   \n",
       "2          15.0                                    1.0   \n",
       "3          15.0                                    1.0   \n",
       "4          15.0                                    1.0   \n",
       "5          15.0                                    1.0   \n",
       "..          ...                                    ...   \n",
       "84         15.0                                    1.0   \n",
       "85         15.0                                    1.0   \n",
       "86         15.0                                    1.0   \n",
       "87         15.0                                    1.0   \n",
       "88         15.0                                    1.0   \n",
       "\n",
       "    F_x__large_standard_deviation__r_0.1  F_y__length  \\\n",
       "1                                    1.0         15.0   \n",
       "2                                    1.0         15.0   \n",
       "3                                    1.0         15.0   \n",
       "4                                    1.0         15.0   \n",
       "5                                    1.0         15.0   \n",
       "..                                   ...          ...   \n",
       "84                                   1.0         15.0   \n",
       "85                                   1.0         15.0   \n",
       "86                                   1.0         15.0   \n",
       "87                                   1.0         15.0   \n",
       "88                                   1.0         15.0   \n",
       "\n",
       "    F_y__large_standard_deviation__r_0.05  \\\n",
       "1                                     1.0   \n",
       "2                                     1.0   \n",
       "3                                     1.0   \n",
       "4                                     1.0   \n",
       "5                                     1.0   \n",
       "..                                    ...   \n",
       "84                                    1.0   \n",
       "85                                    1.0   \n",
       "86                                    1.0   \n",
       "87                                    1.0   \n",
       "88                                    1.0   \n",
       "\n",
       "    F_y__large_standard_deviation__r_0.1  F_z__length  \\\n",
       "1                                    1.0         15.0   \n",
       "2                                    1.0         15.0   \n",
       "3                                    1.0         15.0   \n",
       "4                                    1.0         15.0   \n",
       "5                                    1.0         15.0   \n",
       "..                                   ...          ...   \n",
       "84                                   1.0         15.0   \n",
       "85                                   1.0         15.0   \n",
       "86                                   1.0         15.0   \n",
       "87                                   1.0         15.0   \n",
       "88                                   1.0         15.0   \n",
       "\n",
       "    F_z__large_standard_deviation__r_0.05  \\\n",
       "1                                     1.0   \n",
       "2                                     1.0   \n",
       "3                                     1.0   \n",
       "4                                     1.0   \n",
       "5                                     1.0   \n",
       "..                                    ...   \n",
       "84                                    1.0   \n",
       "85                                    1.0   \n",
       "86                                    1.0   \n",
       "87                                    1.0   \n",
       "88                                    1.0   \n",
       "\n",
       "    F_z__large_standard_deviation__r_0.1  T_x__length  \\\n",
       "1                                    1.0         15.0   \n",
       "2                                    1.0         15.0   \n",
       "3                                    1.0         15.0   \n",
       "4                                    1.0         15.0   \n",
       "5                                    1.0         15.0   \n",
       "..                                   ...          ...   \n",
       "84                                   1.0         15.0   \n",
       "85                                   1.0         15.0   \n",
       "86                                   1.0         15.0   \n",
       "87                                   1.0         15.0   \n",
       "88                                   1.0         15.0   \n",
       "\n",
       "    T_x__large_standard_deviation__r_0.05  \\\n",
       "1                                     1.0   \n",
       "2                                     1.0   \n",
       "3                                     1.0   \n",
       "4                                     1.0   \n",
       "5                                     1.0   \n",
       "..                                    ...   \n",
       "84                                    1.0   \n",
       "85                                    1.0   \n",
       "86                                    1.0   \n",
       "87                                    1.0   \n",
       "88                                    1.0   \n",
       "\n",
       "    T_x__large_standard_deviation__r_0.1  T_y__length  \\\n",
       "1                                    1.0         15.0   \n",
       "2                                    1.0         15.0   \n",
       "3                                    1.0         15.0   \n",
       "4                                    1.0         15.0   \n",
       "5                                    1.0         15.0   \n",
       "..                                   ...          ...   \n",
       "84                                   1.0         15.0   \n",
       "85                                   1.0         15.0   \n",
       "86                                   1.0         15.0   \n",
       "87                                   1.0         15.0   \n",
       "88                                   1.0         15.0   \n",
       "\n",
       "    T_y__large_standard_deviation__r_0.05  \\\n",
       "1                                     1.0   \n",
       "2                                     1.0   \n",
       "3                                     1.0   \n",
       "4                                     1.0   \n",
       "5                                     1.0   \n",
       "..                                    ...   \n",
       "84                                    1.0   \n",
       "85                                    1.0   \n",
       "86                                    1.0   \n",
       "87                                    1.0   \n",
       "88                                    1.0   \n",
       "\n",
       "    T_y__large_standard_deviation__r_0.1  T_z__length  \\\n",
       "1                                    1.0         15.0   \n",
       "2                                    1.0         15.0   \n",
       "3                                    1.0         15.0   \n",
       "4                                    1.0         15.0   \n",
       "5                                    1.0         15.0   \n",
       "..                                   ...          ...   \n",
       "84                                   1.0         15.0   \n",
       "85                                   1.0         15.0   \n",
       "86                                   1.0         15.0   \n",
       "87                                   1.0         15.0   \n",
       "88                                   1.0         15.0   \n",
       "\n",
       "    T_z__large_standard_deviation__r_0.05  \\\n",
       "1                                     0.0   \n",
       "2                                     1.0   \n",
       "3                                     1.0   \n",
       "4                                     1.0   \n",
       "5                                     1.0   \n",
       "..                                    ...   \n",
       "84                                    1.0   \n",
       "85                                    1.0   \n",
       "86                                    1.0   \n",
       "87                                    1.0   \n",
       "88                                    1.0   \n",
       "\n",
       "    T_z__large_standard_deviation__r_0.1  \n",
       "1                                    0.0  \n",
       "2                                    1.0  \n",
       "3                                    1.0  \n",
       "4                                    1.0  \n",
       "5                                    1.0  \n",
       "..                                   ...  \n",
       "84                                   1.0  \n",
       "85                                   1.0  \n",
       "86                                   1.0  \n",
       "87                                   1.0  \n",
       "88                                   1.0  \n",
       "\n",
       "[88 rows x 18 columns]"
      ]
     },
     "execution_count": 29,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 选择性生成特征\n",
    "fc_parameters = {\n",
    "    \"length\": None,\n",
    "    \"large_standard_deviation\": [{\"r\": 0.05}, {\"r\": 0.1}]\n",
    "}\n",
    "extract_features(timeseries, column_id = \"id\",column_sort = \"time\",default_fc_parameters=fc_parameters)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 55,
   "id": "80d917ee",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 自动特征选择\n",
    "X_extracted_cols = X_extracted.isnull().sum().where(lambda x : x==0).dropna().index  # 由于不是所有生成的变量都是有意义的，删除掉包含NA的特征，这也是特征选择函数的要求\n",
    "X_selected = select_features(X_extracted[X_extracted_cols], y)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 61,
   "id": "2e40a627",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "count of raw feature: 2203\n",
      "count of auto-selected feature: 665\n"
     ]
    }
   ],
   "source": [
    "# fresh算法自动从2203个特征中选择出了665个\n",
    "print('count of raw feature: {}'.format(len(X_extracted_cols)))\n",
    "print('count of auto-selected feature: {}'.format(len(X_selected.columns)))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "bba4afa3",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.4"
  },
  "toc": {
   "base_numbering": 1,
   "nav_menu": {},
   "number_sections": false,
   "sideBar": true,
   "skip_h1_title": false,
   "title_cell": "Table of Contents",
   "title_sidebar": "Contents",
   "toc_cell": false,
   "toc_position": {},
   "toc_section_display": true,
   "toc_window_display": false
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
